package com.lzx.demo;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @author 程序员星星
 * @date 2023/3/12
 * @Description
 */
public class Test2 {
    public static void main(String[] args) throws IOException {
        String baseUrl = "https://blog.csdn.net/crazy1013/";
        List<String> articleUrls = getAllArticleUrls(baseUrl);// 获取所有文章链接
        for (String articleUrl : articleUrls) {
            String articleContent = getArticleContent(articleUrl);// 获取文章内容
            System.out.println(articleContent);// 输出文章内容
        }
    }

    /**
     * 获取所有文章链接
     */
    public static List<String> getAllArticleUrls(String baseUrl) throws IOException {
        List<String> articleUrls = new ArrayList<>();
        int pageIndex = 1;
        while (true) {
            String url = baseUrl + "article/list/" + pageIndex;
            Document doc = Jsoup.connect(url).get();
            Elements articleLinks = doc.select("div.article-list div.article-item-box h4 a");
            if (articleLinks.size() == 0) {
                break;// 已经爬取完所有文章链接
            }
            for (Element link : articleLinks) {
                articleUrls.add(link.attr("href"));
            }
            pageIndex++;
        }
        return articleUrls;
    }

    /**
     * 获取文章内容
     */
    public static String getArticleContent(String articleUrl) throws IOException {
        Document doc = Jsoup.connect(articleUrl).get();
        Element content = doc.select("div.article_content").first();
        return content.text();
    }
}
